Analysis_yhc

library(readr)
library(dplyr)
library(stringr)
library(ggplot2)
library(lubridate)
library(tidyr)
library(shiny)
library(plotly)

Import and clean data

data=read_csv("./data/weekly_deaths_by_state_and_causes.csv") 
general_data <- data |>
  janitor::clean_names() |>
  rename_with(~ str_replace_all(., " ", "_")) |>
  filter(jurisdiction_of_occurrence =="United States") |>
  rename_with(~ make.unique(str_replace(., "_\\w\\d.*", ""))) |>
  mutate(month = month(week_ending_date)) |>
  rename( covid_multiple_cause=covid,
          covid_underlying_cause=covid.1,
           symptoms_not_classified=symptoms_signs_and_abnormal_clinical_and_laboratory_findings_not_elsewhere_classified
          )


p_data <- read.delim("./data/Population by States.txt", 
                     header = TRUE, stringsAsFactors = FALSE) |>
  janitor::clean_names()

population_summary <- p_data |>
  filter(year_code >= 2020 & year_code <= 2023) |>
  group_by(year_code)  |> 
  summarise(Total_Population = sum(population, na.rm = TRUE)) |>
  rename(mmwr_year=year_code)

Proportion of death plot

death_proportion_data =general_data |> select(
    week_ending_date, 
    mmwr_year,
    covid_multiple_cause,
    covid_underlying_cause,
    septicemia,
    malignant_neoplasms,
    diabetes_mellitus,
    alzheimer_disease,
    influenza_and_pneumonia,
    chronic_lower_respiratory_diseases,
    other_diseases_of_respiratory_system,
    nephritis_nephrotic_syndrome_and_nephrosis,
    symptoms_not_classified,
    diseases_of_heart,
    cerebrovascular_diseases
  ) |>
  gather(key = "cause_of_death", value = "death_count", 
         covid_multiple_cause,
    covid_underlying_cause,
         septicemia,
         malignant_neoplasms,
         diabetes_mellitus,
         alzheimer_disease,
         influenza_and_pneumonia,
         chronic_lower_respiratory_diseases,
         other_diseases_of_respiratory_system,
         nephritis_nephrotic_syndrome_and_nephrosis,
         symptoms_not_classified,
         diseases_of_heart,
         cerebrovascular_diseases
         ) |>
  group_by(week_ending_date, mmwr_year, cause_of_death) |>
  summarize(death_count = sum(death_count, na.rm = TRUE)) |>
  group_by(week_ending_date, mmwr_year) |>
  mutate(total_deaths = sum(death_count, na.rm = TRUE)) |>
  mutate(proportion = death_count / total_deaths) |>
  arrange(week_ending_date, cause_of_death, desc(proportion))|>
  ungroup()

proportion_plot <- plot_ly(
  data = death_proportion_data,
  x = ~week_ending_date,
  y = ~proportion,
  color = ~cause_of_death,
  colors = RColorBrewer::brewer.pal(12, "Set3"),
  type = 'bar',
  hoverinfo = 'x+y+name',  
  marker = list(opacity = 0.7)  
) |>
  layout(
    title = "Changes in the Proportion of Each Cause of Death in Total Mortality Over Four Years",
    xaxis = list(
      title = "Week Ending Date",
      tickformat = "%Y",  
      dtick = 31536000000  
    ),
    yaxis = list(
      title = "Proportion of Total Deaths"
    ),
    legend = list(
      orientation = "v",  
      x = 1.2, 
      y=1,
      xanchor = "center",
       font = list(size = 5)  
    ),
    plot_bgcolor = 'rgba(240,240,240,0.9)',  
    margin = list(t = 50), 
    showlegend = TRUE,
    barmode = 'stack'  
  )


proportion_plot
proportion_plot2 <- plot_ly(
  data = death_proportion_data,
  x = ~week_ending_date,
  y = ~proportion,
  color = ~cause_of_death,
  colors = RColorBrewer::brewer.pal(12, "Set3"),
  type = 'scatter',  
  mode = 'lines',   
  line = list(opacity = 0.7, width = 2),  
  hoverinfo = 'x+y+name' 
) |>
  layout(
    title = "Changes in the Proportion of Each Cause of Death Over Four Years",
    xaxis = list(
      title = "Week Ending Date",
      tickformat = "%Y", 
      dtick = 31536000000  
    ),
    yaxis = list(
      title = "Proportion of Total Deaths",
      tickformat = ".1%"  
    ),
    legend = list(
      orientation = "v",  
      x = 1.3,
      y = 1,
      xanchor = "center",
      font = list(size = 8)  
    ),
    plot_bgcolor = 'rgba(240,240,240,0.9)', 
    margin = list(t = 50), 
    showlegend = TRUE
  )

proportion_plot2
proportion_plot3 <- plot_ly(
  data = death_proportion_data,
  x = ~week_ending_date,
  y = ~proportion,
  color = ~cause_of_death,
  colors = RColorBrewer::brewer.pal(12, "Set3"),
  type = 'scatter',  
  mode = 'lines',    
  fill = 'tozeroy',  
  line = list(opacity = 1, width = 2),  
  hoverinfo = 'x+y+name'  
) |>
  layout(
    title = "Proportion of Each Cause of Death Over Time",
    xaxis = list(
      title = "Week Ending Date",
      tickformat = "%Y",  
      dtick = 31536000000  
    ),
    yaxis = list(
      title = "Proportion of Total Deaths",
      tickformat = ".1%"  
    ),
    legend = list(
      orientation = "v",  
      x = 1.3,
      y = 1,
      xanchor = "center",
      font = list(size = 8)  
    ),
    plot_bgcolor = 'rgba(240,240,240,0.9)',  
    margin = list(t = 50), 
    showlegend = TRUE
  )

proportion_plot3

The plots above show that among all natural causes of death, excluding COVID-19, the highest proportions are attributed to heart disease and malignant neoplasms. Notably, in 2020 and 2021, these two causes exhibited an inverse relationship with COVID-19 deaths. This phenomenon could be explained by evidence of COVID-19 infection, whether virological or clinical, in the days or weeks leading up to death in patients with heart disease or malignant neoplasms. Such deaths were likely coded as COVID-19 during certification, resulting in an apparent decrease in the reported numbers of deaths from heart disease and malignant neoplasms.

Pie-chart

pie_data <- death_proportion_data |>
  filter(!cause_of_death %in% c("all_cause", "natural_cause"))

# Define UI for Shiny app
ui <- fluidPage(
  titlePanel("Death Proportions for Different Years"),
  
  # Layout: two rows, each containing two pie charts
  fluidRow(
    column(6, plotlyOutput("pie_2020")),
    column(6, plotlyOutput("pie_2021"))
  ),
  fluidRow(
    column(6, plotlyOutput("pie_2022")),
    column(6, plotlyOutput("pie_2023"))
  )
)

# Define server for Shiny app

server <- function(input, output, session) {
  

  # Function to create a pie chart for each year
  pie_chart <- function(year) {
    pie_plot <- pie_data |>
      filter(year == year) |>
      plot_ly(
        labels = ~cause_of_death,
        values = ~proportion,
        type = 'pie',
        textinfo = 'label+percent',
        showlegend = TRUE
      ) |>
      layout(
        title = paste("Proportion of Death Causes in", year),  
        showlegend = FALSE)
       
    return(pie_plot)
  }

  # Render the pie chart for each year
  output$pie_2020 = renderPlotly({ pie_chart(2020) })
  output$pie_2021 = renderPlotly({ pie_chart(2021) })
  output$pie_2022 = renderPlotly({ pie_chart(2022) })
  output$pie_2023 = renderPlotly({ pie_chart(2023) })
}

# Run the Shiny app
shinyApp(ui = ui, server = server)

In the pie chart, it is evident that the leading causes of death are heart disease and malignant neoplasms, followed by COVID-19.

Change of proportion plot

# death by disease
death_sum_data <- death_proportion_data |>
  group_by(mmwr_year, cause_of_death) |>
  summarise(total_deaths = sum(death_count, na.rm = TRUE)) |>
  ungroup()

death_proportion_data <- death_sum_data |>
  group_by(mmwr_year) |>
  mutate(total_deaths_year = sum(total_deaths, na.rm = TRUE)) |>
  ungroup() |>
  mutate(death_proportion = total_deaths / total_deaths_year)  

# death change
death_change_data <- death_proportion_data |>
  arrange(cause_of_death, mmwr_year) |>
  group_by(cause_of_death) |>
  mutate(
    lag_proportion = lag(death_proportion),  
    proportion_change = death_proportion - lag_proportion  
  ) |>
  ungroup()

# top 5 in change
top_changes_data <- death_change_data |>
  group_by(mmwr_year) |>
  slice_max(order_by = abs(proportion_change), n = 5) |>
  ungroup()


proportion_plot = plot_ly(
  data = top_changes_data,
  x = ~as.factor(mmwr_year),  
  y = ~proportion_change,  
  color = ~cause_of_death,  
  colors = RColorBrewer::brewer.pal(12, "Set3"),
  type = 'bar', 
  hoverinfo = 'x+y+name',  
  marker = list(opacity = 0.7)  
) %>%
  layout(
    title = "Top 5 Causes of Death with the Largest Change in Proportion by Year",
    xaxis = list(
      title = "Year",
      tickmode = "linear",  
      dtick = 1  
    ),
    yaxis = list(
      title = "Change in Proportion of Total Deaths"
    ),
    legend = list(
      orientation = "v", 
      x = 1.15, 
      y = 1,  
      xanchor = "center",
      font = list(size = 5)  
    ),
    plot_bgcolor = 'rgba(240,240,240,0.9)',  
    margin = list(t = 50),  
    showlegend = TRUE,
    barmode = 'group'  
  )

proportion_plot

It can be observed that for COVID-19, its proportion of total deaths increased from 2020 to 2021 and then declined for the next two consecutive years. In contrast, heart disease and malignant neoplasms showed the opposite trend, with their proportions of total deaths decreasing from 2020 to 2021 and subsequently rising over the following two years.

Thoughts about framework

What can we learn from death?

Have you ever thought about which week you are most likely to die in a year? Time Trend Analysis: By examining the fluctuations in death rates across the 52 weeks of a year, we can identify peak mortality periods and try to find underlying causes (death peaks and seasonal illnesses, public health crises).

Weekly trends might look different depending on where you are, as mortality rates are influenced by both environmental and healthcare system factors.

National and Regional Level Analysis Explore how death might change across time and regions.

City-Level Analysis Narrowing it down to a city like New York